*-------------------------------------------------------------------------------
* Objective: Generate total natural resources rents as share of GDP
*-------------------------------------------------------------------------------

********************************************************************************
**# Step 1: Load total natural resources rents (% of GDP)
/*******************************************************************************
Source: World Bank WDI
Notes: We take 0s as missing values
       Data accessed 12.03.2020
*******************************************************************************/

* Import Data
import excel "raw_datasets/rrents.xlsx", sheet("Data") firstrow clear

* Replace 0s with missings
quietly{
	foreach var of varlist y1960-y2019{
	replace `var' = "" if `var' == ".." | `var' == "0"
	}
	destring y*, replace
}

* Format Data
rename CountryCode code_wb
rename y2019 y2020
reshape long y, i(code_wb) j(year)
rename y rrents

* Keep only desired countries
merge m:1 code_wb using "processed_datasets/ccode", keepusing(country_wb)
keep if _merge == 3
drop _merge country_wb CountryName SeriesName SeriesCode 

********************************************************************************
**# Step 2.1: Missing Data - Re-creating Former Sudan
/*******************************************************************************
This dataset only contains natural resources rents as a share of GDP, so we need 
GDP values for Sudan and South Sudan in order to create a weighted meassure. 
We obtain these from World Bank's WDI. Furthermore, we take South Sudan 2015 
values (for GDP and rrents) as representative for the years 2016, 2017 and 2018 
(the series for Sudan ends in 2018 so we don't need 2019 South Sudan)
*******************************************************************************/

replace rrents = 9.499028806 if year == 2012 & code_wb == "SDN"
replace rrents = 12.86935439 if year == 2013 & code_wb == "SDN"
replace rrents = 11.53969046 if year == 2014 & code_wb == "SDN"
replace rrents = 6.230474317 if year == 2015 & code_wb == "SDN"
replace rrents = 9.254365989 if year == 2016 & code_wb == "SDN"
replace rrents = 11.88800766 if year == 2017 & code_wb == "SDN"
replace rrents = 18.67632638 if year == 2018 & code_wb == "SDN"
drop if code_wb == "SSD"

********************************************************************************
**# Step 2.1: Missing Data - Approximating 2020
/*******************************************************************************
We approximate 2020 with the closest year post 2015, then use lin. interpolation
*******************************************************************************/

sort code_wb year
gen identi = 0
by code_wb: replace identi = 1 if rrents != . & rrents[60] == . & year == 2018
by code_wb: replace rrents = rrents[59] if rrents == . & year == 2020
by code_wb: replace identi = 1 if rrents != . & rrents[60] == . & year == 2017
by code_wb: replace rrents = rrents[58] if rrents == . & year == 2020
by code_wb: replace identi = 1 if rrents != . & rrents[60] == . & year == 2016
by code_wb: replace rrents = rrents[57] if rrents == . & year == 2020
replace rrents = . if identi == 1
drop identi

* Replacing with interpolation
sort code_wb year
by code_wb: ipolate rrents year, gen(intrents)
replace rrents = intrents if rrents == .
drop intrents

********************************************************************************
**# Step 3: Finalize dataset
********************************************************************************

keep if year == 1960 | year == 1965 | year == 1970 | year == 1975 | year == 1980 | year == 1985 | year == 1990 | year == 1995 ///
| year == 2000 | year == 2005 | year == 2010 | year == 2015 | year == 2020

save "processed_datasets/dataset_rrents", replace
